## loading libraries
library(ggplot2)
library(ggpubr)
library(Seurat)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
library(readxl)
library(corto)
library(Matrix)
library(EnhancedVolcano)
## Loading required package: ggrepel
## Initilization 
set.seed(0)
label_size=8
point_size=1
cell_type_genes=list("Bowman’s glands"=c("SOX9", "SOX10", "GPX3"),
                     "olfactory HBCs"=c("TP63", "KRT5", "CXCL14", "SOX2", "MEG3"),
                     "olfactory ensheathing glia"=c("S100B", "PLP1", "PMP2","MPZ", "ALX3"),
                     "olfactory microvillar cells"=c("ASCL3", "CFTR", "HEPACAM2", "FOXL1"),
                     "immature neurons"=c("GNG8", "OLIG2", "EBF2", "LHX2", "CBX8"),
                     "mature neurons"=c("GNG13", "EBF2", "CBX8", "RTP1"),
                     "GBCs"=c("HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1"),
                     "sustentacular cells"=c("CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2"))
cell_type_genes_temp=list("Bowman’s glands"=c("SOX9", "GPX3"),
                          "olfactory HBCs"=c("CXCL14", "MEG3"),
                          "olfactory ensheathing glia"=c("S100B", "PLP1"),
                          "olfactory microvillar cells"=c( "ASCL3", "HEPACAM2"),
                          "immature neurons"=c("GNG8","OLIG2"),
                          "mature neurons"=c("GNG13","RTP1"),
                          "GBCs"=c("CXCR4", "SOX2"),
                          "sustentacular cells"=c("CYP2A13", "ERMN"))
marker_genes=c("SOX9", "SOX10", "GPX3",
               "TP63", "KRT5", "CXCL14", "SOX2", "MEG3",
               "S100B", "PLP1", "PMP2","MPZ", "ALX3",
               "ASCL3", "CFTR", "HEPACAM2", "FOXL1",
               "GNG8", "OLIG2", "EBF2", "LHX2", "CBX8",
               "GNG13", "EBF2", "CBX8", "RTP1",
               "HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1",
               "CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2")
marker_genes_temp=c("SOX9", "GPX3","CXCL14", "MEG3","S100B", "PLP1", "ASCL3", "HEPACAM2",
                    "GNG8","OLIG2","GNG13","RTP1","CXCR4", "SOX2","CYP2A13", "ERMN")
cell_type_names=c("Bowman’s glands",
                  "olfactory HBCs",
                  "olfactory ensheathing glia",
                  "olfactory microvillar cells",
                  "immature neurons",
                  "mature neurons",
                  "GBCs",
                  "sustentacular cells")

## Loading paitent P2 and P3 matrices and formation of Seurat object
batch_list=list("P2","P3")
batch_data_list=list("P2"=1,"P3"=1)
for( i in 1:length(batch_list))
{
  print(batch_list[[i]])
  s_object=Read10X(paste("~/corona_project/Input_files/",batch_list[[i]],sep=""))
  s_object=CreateSeuratObject(counts =s_object, min.cells = 0, min.features = 400, project = "P23")
  s_object[["percent.mt"]] <- PercentageFeatureSet(s_object, pattern = "^MT-")
  s_object <- subset(s_object, subset = nFeature_RNA >100 & nFeature_RNA <8000 & percent.mt <10)
  s_object@meta.data[, "run"] <- batch_list[i]
  s_object=NormalizeData(s_object)
  batch_data_list[[i]]=FindVariableFeatures(s_object, selection.method = "vst", nfeatures =5000)
}
## [1] "P2"
## [1] "P3"
batch_data_list
## $P2
## An object of class Seurat 
## 33538 features across 10881 samples within 1 assay 
## Active assay: RNA (33538 features, 5000 variable features)
## 
## $P3
## An object of class Seurat 
## 33538 features across 5415 samples within 1 assay 
## Active assay: RNA (33538 features, 5000 variable features)
saveRDS(batch_data_list,"~/corona_project/batch_data_list_5000_23.rds")

## Integration and batch effect removing using Seurat
set.seed(1)
run.anchors <- FindIntegrationAnchors(object.list = batch_data_list, dims = 1:30,anchor.features = 5000)
## Computing 5000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 11489 anchors
## Filtering anchors
##  Retained 8787 anchors
## Extracting within-dataset neighbors
a=run.anchors
run.combined <- IntegrateData(anchorset = run.anchors, dims = 1:30)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
b=run.combined
sce_3_1_1_before=run.combined
saveRDS(sce_3_1_1_before,"~/corona_project/sce_3_1_1_before_5000_23.rds")

## Dim reduction and clustering
DefaultAssay(run.combined) <- "integrated"
run.combined <- ScaleData(run.combined, verbose = FALSE)
run.combined <- RunPCA(run.combined, npcs = 30, verbose = FALSE)
run.combined <- RunUMAP(run.combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:37:31 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:37:31 Read 16296 rows and found 30 numeric columns
## 21:37:31 Using Annoy for neighbor search, n_neighbors = 30
## 21:37:31 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:37:34 Writing NN index file to temp file /tmp/Rtmp99y6nN/file73417bb894fd
## 21:37:34 Searching Annoy index using 1 thread, search_k = 3000
## 21:37:38 Annoy recall = 100%
## 21:37:39 Commencing smooth kNN distance calibration using 1 thread
## 21:37:39 Initializing from normalized Laplacian + noise
## 21:37:41 Commencing optimization for 200 epochs, with 707164 positive edges
## 21:37:46 Optimization finished
run.combined <- FindNeighbors(run.combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
run.combined <- FindClusters(run.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 16296
## Number of edges: 654023
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9526
## Number of communities: 22
## Elapsed time: 2 seconds
## Cell-types annotation
for(i in 0:as.integer(length(marker_genes)/4))
{
  a=(i*4+1)
  b=((i+1)*4)
  if(i==as.integer(length(marker_genes)/4))
    b=length(marker_genes)
  if(a>length(marker_genes))
     next
  print(i)
  print(VlnPlot(run.combined, features = marker_genes[a:b],ncol=2))
  print(FeaturePlot(run.combined, features = marker_genes[a:b],cols=c("lightgrey", "red"),label = TRUE,pt.size=2,order = TRUE))
}
## [1] 0

## [1] 1

## [1] 2

## [1] 3

## [1] 4
## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## [1] 5
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## [1] 6
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## [1] 7

## [1] 8

## [1] 9

run.combined<-RenameIdents(run.combined,`13`="Immature neurons", `14`="Mature neurons", `3`="Olfactory HBCs",`19`="Olfactory microvillar cells",
                   `5`="Bowman's gland", `16`="Olfactory ensheathing glia", `9`="Sustentacular cells",`20`="GBCs" )
run.combined=subset(run.combined, idents = c("Immature neurons","Mature neurons","Olfactory HBCs","Olfactory microvillar cells","Bowman's gland",
                          "Olfactory ensheathing glia","Sustentacular cells","GBCs"))
sce_3_1_1_after=run.combined
saveRDS(sce_3_1_1_after,"~/corona_project/sce_3_1_1_after_5000_23.rds")

## Ploting heat map for selected gene markers on all clusters
sce_3_1_1_after_5000_23 <- readRDS("~/corona_project/sce_3_1_1_after_5000_23.rds")
DoHeatmap(sce_3_1_1_after_5000_23,features =c(marker_genes_temp),size = 3, draw.lines = FALSE )

## Vln plots with all selected gene markers
plot=list()
for(i in 1:length(marker_genes_temp))
{
  print(i)
  if(i %in% c(1,5,9))
    plot[[i]]<-print(VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]])) +
      NoLegend() +   theme(axis.title.x=element_blank(),
                           axis.text.x=element_blank(),
                           axis.ticks.x=element_blank(),
                           axis.line.x = element_blank())
  if(i %in% c(14,15,16))
    plot[[i]]<-print(VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]])) +
      NoLegend() +   theme(axis.title.y=element_blank(),
                           axis.text.y=element_blank(),
                           axis.ticks.y=element_blank(),
                           axis.line.y = element_blank())
  if(i %in% c(13))
    plot[[i]]<-print(VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]])) +
      NoLegend()
  if(i %in% c(2,3,4,6,7,8,10,11,12))
    plot[[i]]<-print(VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]])) +
      NoLegend()+   theme(axis.title.x=element_blank(),
                          axis.text.x=element_blank(),
                          axis.ticks.x=element_blank(),
                          axis.line.x = element_blank(),
                          axis.title.y=element_blank(),
                          axis.text.y=element_blank(),
                          axis.ticks.y=element_blank(),
                          axis.line.y = element_blank())
  
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

## [1] 12

## [1] 13

## [1] 14

## [1] 15

## [1] 16

cowplot::plot_grid(plotlist = plot,nrow=4,rel_heights = c(1,1,1,2))

## Feature plots with all selected gene markers
plot=list()
plot[[1]]=DimPlot(sce_3_1_1_after_5000_23, reduction = "umap",pt.size = point_size,label=TRUE)+
  labs(title = "") + NoAxes() + NoLegend()
for(i in 1:length(marker_genes_temp))
{
  print(i)
  plot[[i+1]]<-print(FeaturePlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]],cols=c("lightgrey", "red"),
                                 pt.size = point_size,label.size = label_size,order = TRUE)) +
    NoLegend() + NoAxes()
}
## [1] 1

## [1] 2

## [1] 3

## [1] 4

## [1] 5

## [1] 6

## [1] 7

## [1] 8

## [1] 9

## [1] 10

## [1] 11

## [1] 12

## [1] 13

## [1] 14

## [1] 15

## [1] 16

AT=cowplot::plot_grid(plotlist = plot[c(3,4,5)],ncol=3)
AB=cowplot::plot_grid(plotlist = plot[c(12,13,11)],ncol=3)
AR=cowplot::plot_grid(plotlist = plot[c(6,7,8,9,10)],nrow=5)
AL=cowplot::plot_grid(plotlist = plot[c(2,14,15,16,17)],nrow=5)
AC=cowplot::plot_grid(plotlist = plot[c(1)],nrow=1)
ATCB=cowplot::plot_grid(plotlist = list(AT,AC,AB),nrow=3,rel_heights = c(1,4,1))
cowplot::plot_grid(plotlist = list(AL,ATCB,AR),ncol=3,rel_widths = c(1,2,1))

### Dim plot to show all cell types with different colors
DimPlot(sce_3_1_1_after_5000_23, reduction = "umap",pt.size = point_size,label=TRUE)+ labs(title = "") + NoLegend()

DimPlot(sce_3_1_1_after_5000_23, reduction = "umap",pt.size = point_size,label=FALSE)+ labs(title = "")

## FeaturePlot to show expression of gene markers in respective cell markers
### FeaturePlot to show the expression of ACE2 in each cell type
FeaturePlot(run.combined, features = "ACE2",combine = FALSE,cols=c("lightgrey", "red"),pt.size = point_size,label.size = label_size,order =TRUE)
## Warning: Could not find ACE2 in the default search locations, found in RNA assay
## instead
## [[1]]

### FeaturePlot to show the expression of TMPRSS2 in each cell type
FeaturePlot(run.combined, features = "TMPRSS2",combine = FALSE,cols=c("lightgrey", "green"),pt.size = point_size,label.size = label_size,order =TRUE)
## [[1]]

### FeaturePlot to show the expression of CTSL in each cell type
FeaturePlot(run.combined, features = "CTSL",combine = FALSE,cols=c("lightgrey", "blue"),pt.size = point_size,label.size = label_size,order =TRUE)
## Warning: Could not find CTSL in the default search locations, found in RNA assay
## instead
## [[1]]

### FeaturePlot to show the expression of BSG in each cell type
FeaturePlot(run.combined, features = "BSG",combine = FALSE,cols=c("lightgrey", "magenta"),pt.size = point_size,label.size = label_size,order =TRUE)
## [[1]]

## Initiallizing a matrix for 24 * 8, 8 for cells type and 24 for different cases as shown below
count_matrix=matrix(0,24,8)
rownames(count_matrix)<-c("ACE2_count","ACE2_percent","TMPRSS2_count","TMPRSS2_percent",
                          "CTSL_count","CTSL_percent","BSG_count","BSG_percent",
                          "intersect_of_ACE2_TMPRSS2_count","ACE2_contri_percent","TMPRSS2_contri_percent",
                          "intersect_of_ACE2_CTSL_count","ACE2_contri_percent","CTSL_contri_percent",
                          "intersect_of_BSG_TMPRSS2_count","BSG_contri_percent","TMPRSS2_contri_percent",
                          "intersect_of_BSG_CTSL_count","BSG_contri_percent","CSTL_contri_percent",
                          "ACE2_TMPRSS2_CTSL","ACE2_TMPRSS2_CTSB",
                          "BSB_TMPRSS2_CTSL","BSG_TMPRSS2_CTSB")
colnames(count_matrix)<-unique(Idents(run.combined))
### Percentage of ACE2 expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
count_matrix[1,names(a)]=a
print(sum(a))
## [1] 32
a=a*100/sum(a)
count_matrix[2,names(a)]=a
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538  3906
### Percentage of TMPRSS2 expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])])
count_matrix[3,names(a)]=a
print(sum(a))
## [1] 666
a=a*100/sum(a)
count_matrix[4,names(a)]=a
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538  3906
### Percentage of CTSL expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])])
count_matrix[5,names(a)]=a
print(sum(a))
## [1] 1631
a=a*100/sum(a)
count_matrix[6,names(a)]=a
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538  3906
### Percentage of BSG expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])])
count_matrix[7,names(a)]=a
print(sum(a))
## [1] 3014
a=a*100/sum(a)
count_matrix[8,names(a)]=a
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538  3906
### Bar plot of intersect of both ACE2 and TMPRSS2
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_ACE2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])]
intersect_names=intersect(names(col_name_ACE2),names(col_name_TMPRSS2))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[9,names(intersect_cells)]=intersect_cells
TMPRSS2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])])
ACE2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
count_matrix[10,names(ACE2_cells)]=(intersect_cells/ACE2_cells)*100
count_matrix[11,names(TMPRSS2_cells)]=(intersect_cells/TMPRSS2_cells)*100
new_data=data.frame("cells_percenrage"=c(((intersect_cells/TMPRSS2_cells)*100),((intersect_cells/ACE2_cells)*100)),
                    genes=c(rep("TMPRSS2",length(TMPRSS2_cells)),rep("ACE2",length(ACE2_cells))),
                    cell_types=c(names(TMPRSS2_cells),names(ACE2_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 4 rows containing missing values (position_stack).

### Bar plot of intersect of both ACE2 and CTSL
col_name_CTSL=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])]
col_name_ACE2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])]
intersect_names=intersect(names(col_name_ACE2),names(col_name_CTSL))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[12,names(intersect_cells)]=intersect_cells
CTSL_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])])
ACE2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
count_matrix[13,names(ACE2_cells)]=(intersect_cells/ACE2_cells)*100
count_matrix[14,names(CTSL_cells)]=(intersect_cells/CTSL_cells)*100
new_data=data.frame("cells_percenrage"=c(((intersect_cells/CTSL_cells)*100),((intersect_cells/ACE2_cells)*100)),
                    genes=c(rep("CTSL",length(CTSL_cells)),rep("ACE2",length(ACE2_cells))),
                    cell_types=c(names(CTSL_cells),names(ACE2_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Removed 4 rows containing missing values (position_stack).

### Bar plot of intersect of both BSG and TMPRSS2
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_BSG=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])]
intersect_names=intersect(names(col_name_BSG),names(col_name_TMPRSS2))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[15,names(intersect_cells)]=intersect_cells
TMPRSS2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])])
BSG_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])])
count_matrix[16,names(BSG_cells)]=(intersect_cells/BSG_cells)*100
count_matrix[17,names(TMPRSS2_cells)]=(intersect_cells/TMPRSS2_cells)*100
new_data=data.frame("cells_percenrage"=c(((intersect_cells/TMPRSS2_cells)*100),((intersect_cells/BSG_cells)*100)),
                    genes=c(rep("TMPRSS2",length(TMPRSS2_cells)),rep("BSG",length(BSG_cells))),
                    cell_types=c(names(TMPRSS2_cells),names(BSG_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### Bar plot of intersect of both BSG and CTSL
col_name_CTSL=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])]
col_name_BSG=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])]
intersect_names=intersect(names(col_name_BSG),names(col_name_CTSL))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[18,names(intersect_cells)]=intersect_cells
CTSL_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])])
BSG_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])])
count_matrix[19,names(BSG_cells)]=(intersect_cells/BSG_cells)*100
count_matrix[20,names(CTSL_cells)]=(intersect_cells/CTSL_cells)*100
new_data=data.frame("cells_percenrage"=c(((intersect_cells/CTSL_cells)*100),((intersect_cells/BSG_cells)*100)),
                    genes=c(rep("CTSL",length(CTSL_cells)),rep("BSG",length(BSG_cells))),
                    cell_types=c(names(CTSL_cells),names(BSG_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### Intersect of ACE2, TMPRSS2 and CTSL
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_ACE2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])]
col_name_CTSL=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])]
intersect_names=intersect(names(col_name_ACE2),names(col_name_TMPRSS2))
intersect_names=intersect(intersect_names,names(col_name_CTSL))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[21,names(intersect_cells)]=intersect_cells
### Intersect of ACE2, TMPRSS2 and CTSB
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_ACE2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])]
col_name_CTSB=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSB",])>0])]
intersect_names=intersect(names(col_name_ACE2),names(col_name_TMPRSS2))
intersect_names=intersect(intersect_names,names(col_name_CTSB))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[22,names(intersect_cells)]=intersect_cells
### Intersect of BSG, TMPRSS2 and CTSL
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_BSG=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])]
col_name_CTSL=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSL",])>0])]
intersect_names=intersect(names(col_name_BSG),names(col_name_TMPRSS2))
intersect_names=intersect(intersect_names,names(col_name_CTSL))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[23,names(intersect_cells)]=intersect_cells
### Intersect of BSG, TMPRSS2 and CTSB
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_BSG=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["BSG",])>0])]
col_name_CTSB=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["CTSB",])>0])]
intersect_names=intersect(names(col_name_BSG),names(col_name_TMPRSS2))
intersect_names=intersect(intersect_names,names(col_name_CTSB))
intersect_cells=table(Seurat::Idents(run.combined)[intersect_names])
count_matrix[24,names(intersect_cells)]=intersect_cells
count_matrix
##                                 Immature neurons Sustentacular cells
## ACE2_count                             0.0000000           15.000000
## ACE2_percent                           0.0000000           46.875000
## TMPRSS2_count                          5.0000000          345.000000
## TMPRSS2_percent                        0.7507508           51.801802
## CTSL_count                           167.0000000          360.000000
## CTSL_percent                          10.2391171           22.072348
## BSG_count                            347.0000000          622.000000
## BSG_percent                           11.5129396           20.637027
## intersect_of_ACE2_TMPRSS2_count        0.0000000            9.000000
## ACE2_contri_percent                          NaN           60.000000
## TMPRSS2_contri_percent                 0.0000000            2.608696
## intersect_of_ACE2_CTSL_count           0.0000000           11.000000
## ACE2_contri_percent                          NaN           73.333333
## CTSL_contri_percent                    0.0000000            3.055556
## intersect_of_BSG_TMPRSS2_count         5.0000000          334.000000
## BSG_contri_percent                     1.4409222           53.697749
## TMPRSS2_contri_percent               100.0000000           96.811594
## intersect_of_BSG_CTSL_count          148.0000000          347.000000
## BSG_contri_percent                    42.6512968           55.787781
## CSTL_contri_percent                   88.6227545           96.388889
## ACE2_TMPRSS2_CTSL                      0.0000000            7.000000
## ACE2_TMPRSS2_CTSB                      0.0000000            8.000000
## BSB_TMPRSS2_CTSL                       2.0000000          217.000000
## BSG_TMPRSS2_CTSB                       3.0000000          310.000000
##                                 Olfactory HBCs Mature neurons
## ACE2_count                          11.0000000      0.0000000
## ACE2_percent                        34.3750000      0.0000000
## TMPRSS2_count                       14.0000000      2.0000000
## TMPRSS2_percent                      2.1021021      0.3003003
## CTSL_count                         507.0000000    112.0000000
## CTSL_percent                        31.0852238      6.8669528
## BSG_count                          687.0000000    249.0000000
## BSG_percent                         22.7936297      8.2614466
## intersect_of_ACE2_TMPRSS2_count      0.0000000      0.0000000
## ACE2_contri_percent                  0.0000000            NaN
## TMPRSS2_contri_percent               0.0000000      0.0000000
## intersect_of_ACE2_CTSL_count         4.0000000      0.0000000
## ACE2_contri_percent                 36.3636364            NaN
## CTSL_contri_percent                  0.7889546      0.0000000
## intersect_of_BSG_TMPRSS2_count      13.0000000      2.0000000
## BSG_contri_percent                   1.8922853      0.8032129
## TMPRSS2_contri_percent              92.8571429    100.0000000
## intersect_of_BSG_CTSL_count        342.0000000    109.0000000
## BSG_contri_percent                  49.7816594     43.7751004
## CSTL_contri_percent                 67.4556213     97.3214286
## ACE2_TMPRSS2_CTSL                    0.0000000      0.0000000
## ACE2_TMPRSS2_CTSB                    0.0000000      0.0000000
## BSB_TMPRSS2_CTSL                     7.0000000      2.0000000
## BSG_TMPRSS2_CTSB                     6.0000000      1.0000000
##                                 Olfactory ensheathing glia Bowman's gland
## ACE2_count                                       0.0000000      4.0000000
## ACE2_percent                                     0.0000000     12.5000000
## TMPRSS2_count                                    6.0000000    239.0000000
## TMPRSS2_percent                                  0.9009009     35.8858859
## CTSL_count                                      90.0000000    281.0000000
## CTSL_percent                                     5.5180871     17.2286941
## BSG_count                                      157.0000000    772.0000000
## BSG_percent                                      5.2090246     25.6138023
## intersect_of_ACE2_TMPRSS2_count                  0.0000000      1.0000000
## ACE2_contri_percent                                    NaN     25.0000000
## TMPRSS2_contri_percent                           0.0000000      0.4184100
## intersect_of_ACE2_CTSL_count                     0.0000000      2.0000000
## ACE2_contri_percent                                    NaN     50.0000000
## CTSL_contri_percent                              0.0000000      0.7117438
## intersect_of_BSG_TMPRSS2_count                   5.0000000    228.0000000
## BSG_contri_percent                               3.1847134     29.5336788
## TMPRSS2_contri_percent                          83.3333333     95.3974895
## intersect_of_BSG_CTSL_count                     68.0000000    266.0000000
## BSG_contri_percent                              43.3121019     34.4559585
## CSTL_contri_percent                             75.5555556     94.6619217
## ACE2_TMPRSS2_CTSL                                0.0000000      0.0000000
## ACE2_TMPRSS2_CTSB                                0.0000000      1.0000000
## BSB_TMPRSS2_CTSL                                 2.0000000     91.0000000
## BSG_TMPRSS2_CTSB                                 1.0000000    215.0000000
##                                 Olfactory microvillar cells       GBCs
## ACE2_count                                         0.000000   2.000000
## ACE2_percent                                       0.000000   6.250000
## TMPRSS2_count                                     16.000000  39.000000
## TMPRSS2_percent                                    2.402402   5.855856
## CTSL_count                                        59.000000  55.000000
## CTSL_percent                                       3.617413   3.372164
## BSG_count                                        113.000000  67.000000
## BSG_percent                                        3.749171   2.222960
## intersect_of_ACE2_TMPRSS2_count                    0.000000   2.000000
## ACE2_contri_percent                                     NaN 100.000000
## TMPRSS2_contri_percent                             0.000000   5.128205
## intersect_of_ACE2_CTSL_count                       0.000000   1.000000
## ACE2_contri_percent                                     NaN  50.000000
## CTSL_contri_percent                                0.000000   1.818182
## intersect_of_BSG_TMPRSS2_count                    16.000000  37.000000
## BSG_contri_percent                                14.159292  55.223881
## TMPRSS2_contri_percent                           100.000000  94.871795
## intersect_of_BSG_CTSL_count                       57.000000  50.000000
## BSG_contri_percent                                50.442478  74.626866
## CSTL_contri_percent                               96.610169  90.909091
## ACE2_TMPRSS2_CTSL                                  0.000000   1.000000
## ACE2_TMPRSS2_CTSB                                  0.000000   1.000000
## BSB_TMPRSS2_CTSL                                   9.000000  32.000000
## BSG_TMPRSS2_CTSB                                  11.000000  33.000000
## Bar plots to show + expressed cells of some gene markers as shown beelow
### Bar stack plot of ACE2, TMPRSS2 and (ACE2 and TMPRSS2)
count=c(count_matrix["ACE2_count",],count_matrix["TMPRSS2_count",],count_matrix["intersect_of_ACE2_TMPRSS2_count",])
count[count==0]<-1
A_T_AT_df=data.frame("count"=count,"cell_markers"= names(count),
                     "gene_markers"=c(rep("ACE2",8),rep("TMPRSS2",8),rep("Intersect (ACE2 and TMPRSS2)",8)))
ggplot(data=A_T_AT_df, aes(x=cell_markers, y=count, fill=gene_markers,width=.7)) +
  geom_bar(stat="identity")+
  theme_minimal() + scale_fill_manual(values=c('red','green',"blue")) +
  scale_y_continuous(trans='log10') +
  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### Bar stack plot of BSG, CTSL and (BSG and CTSL)
count=c(count_matrix["BSG_count",],count_matrix["CTSL_count",],count_matrix["intersect_of_BSG_CTSL_count",])
count[count==0]<-1
B_C_BC_df=data.frame("count"=count,"cell_markers"= names(count),
                     "gene_markers"=c(rep("BSG",8),rep("CTSL",8),rep("Intersect (BSG and CTSL)",8)))
ggplot(data=B_C_BC_df, aes(x=cell_markers, y=count, fill=gene_markers,width=.7)) +
  geom_bar(stat="identity")+
  theme_minimal() + scale_fill_manual(values=c('red','green',"blue")) +
  scale_y_continuous(trans='log10') +
  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### Bar stack plot of BSG, CTSL and (BSG and CTSL)
count=c(count_matrix["ACE2_count",],count_matrix["CTSL_count",],count_matrix["intersect_of_ACE2_CTSL_count",])
count[count==0]<-1
A_C_AC_df=data.frame("count"=count,"cell_markers"= names(count),
                     "gene_markers"=c(rep("ACE2",8),rep("CTSL",8),rep("Intersect (ACE2 and CTSL)",8)))
ggplot(data=A_C_AC_df, aes(x=cell_markers, y=count, fill=gene_markers,width=.7)) +
  geom_bar(stat="identity")+
  theme_minimal() + scale_fill_manual(values=c('red','green',"blue")) +
  scale_y_continuous(trans='log10') +
  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### Bar stack plot of BSG, TMPRSS2 and (BSG and TMPRSS2)
count=c(count_matrix["BSG_count",],count_matrix["TMPRSS2_count",],count_matrix["intersect_of_BSG_TMPRSS2_count",])
count[count==0]<-1
B_T_BT_df=data.frame("count"=count,"cell_markers"= names(count),
                     "gene_markers"=c(rep("BSG",8),rep("TMPRSS2",8),rep("Intersect (BSG and TMPRSS2)",8)))
ggplot(data=B_T_BT_df, aes(x=cell_markers, y=count, fill=gene_markers,width=.7)) +
  geom_bar(stat="identity")+
  theme_minimal() + scale_fill_manual(values=c('red','green',"blue")) +
  scale_y_continuous(trans='log10') +
  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

## DE genes analysis for selected cell types
run.combined=sce_3_1_1_after_5000_23
DefaultAssay(run.combined) <- "RNA"
seurat_subset_list=list()
p_val=0.05
l_fold=1
de_methods=c("poisson")
### DE genes analysis on intersection of (ACE2 and TMPRSS2)+ and (ACE2 and TMPRSS2)-
gene_marker=c("ACE2","TMPRSS2")
print(sort(count_matrix[1,]))
##            Immature neurons              Mature neurons 
##                           0                           0 
##  Olfactory ensheathing glia Olfactory microvillar cells 
##                           0                           0 
##                        GBCs              Bowman's gland 
##                           2                           4 
##              Olfactory HBCs         Sustentacular cells 
##                          11                          15
selected_cells_type=sort(count_matrix[9,])
selected_cells_type=selected_cells_type[selected_cells_type>2]
for(k in de_methods)
  for(i in 1:length(selected_cells_type))
  {
    print(paste(k,"_",i,sep=""))
    seurat_subset=subset(run.combined, idents = names(selected_cells_type)[i])
    positive_cells_name = colnames(seurat_subset[,(as.matrix(seurat_subset@assays$RNA[gene_marker[1],])>0 & as.matrix(seurat_subset@assays$RNA[gene_marker[2],])>0)])
    negative_cells_name = colnames(seurat_subset[,(as.matrix(seurat_subset@assays$RNA[gene_marker[1],])<=0 | as.matrix(seurat_subset@assays$RNA[gene_marker[2],])<=0)])
    pos_cells=paste(gene_marker[1],"_and_",gene_marker[2],"+",sep="")
    neg_cells=paste(gene_marker[1],"_and_",gene_marker[2],"-",sep="")
    pos=c(rep(pos_cells,length(positive_cells_name)))
    names(pos)=positive_cells_name
    neg=c(rep(neg_cells,length(negative_cells_name)))
    names(neg)=negative_cells_name
    pos_neg=c(pos,neg)
    print(paste("cells in seurat object after subsetting with ",names(selected_cells_type)[i]," = ",dim(seurat_subset)[2],sep=""))
    print(paste(names(selected_cells_type)[i]," total cells = ",sum(pos_neg[which(names(pos_neg)%in%rownames(seurat_subset@meta.data))]==pos_neg),sep=""))
    print(paste(pos_cells," cells = ",sum(pos_neg==pos_cells),sep=""))
    print(paste(neg_cells," cells = ",sum(pos_neg==neg_cells),sep=""))
    seurat_subset=AddMetaData(
      object = seurat_subset,
      metadata = pos_neg,
      col.name = 'seurat_subset'
    )
    seurat_subset_list[[paste(names(selected_cells_type)[i],"_",gene_marker[1],"_and_",gene_marker[2],sep="")]]<-seurat_subset
    Idents(seurat_subset)<-pos_neg
    seurat_subset_genes=FindMarkers(seurat_subset, ident.1=pos_cells,ident.2=neg_cells,test.use = k,min.cells.group=1)
    print(seurat_subset_genes[1:2,])
    seurat_subset_genes_up=rownames(seurat_subset_genes)[seurat_subset_genes$avg_logFC>l_fold & seurat_subset_genes$p_val<p_val]
    seurat_subset_genes_down=rownames(seurat_subset_genes)[seurat_subset_genes$avg_logFC<(-l_fold) & seurat_subset_genes$p_val<p_val]
    seurat_subset_genes_names=c(seurat_subset_genes_up,seurat_subset_genes_down)
    print(paste(names(selected_cells_type)[i],"_",pos_cells," genes = ",length(seurat_subset_genes_up),
                " and ",names(selected_cells_type)[i],"_",neg_cells," genes = ",length(seurat_subset_genes_down),sep=""))
    DE_info=data.frame("log2FC"=as.numeric(seurat_subset_genes$avg_logFC),"p_val"=as.numeric(seurat_subset_genes$p_val))
    lab=rownames(seurat_subset_genes)
    print(EnhancedVolcano(DE_info,
                          lab = lab,
                          x = 'log2FC',
                          y = 'p_val',
                          xlim = c(-5, 5),
                          title = names(selected_cells_type)[i],
                          pCutoff = p_val,
                          FCcutoff = l_fold,
                          colAlpha = 1))
  }
## [1] "poisson_1"
## [1] "cells in seurat object after subsetting with Sustentacular cells = 737"
## [1] "Sustentacular cells total cells = 737"
## [1] "ACE2_and_TMPRSS2+ cells = 9"
## [1] "ACE2_and_TMPRSS2- cells = 728"
## Warning in FUN(X[[i]], ...): Skipping gene --- 1650. Fewer than 3 cells in both
## clusters.
## Warning in FUN(X[[i]], ...): Skipping gene --- 1780. Fewer than 3 cells in both
## clusters.
## Warning in FUN(X[[i]], ...): Skipping gene --- 1957. Fewer than 3 cells in both
## clusters.
##         p_val avg_logFC pct.1 pct.2 p_val_adj
## SCGB1A1     0  3.269476 0.111 0.058         0
## BPIFB1      0  2.985891 0.111 0.063         0
## [1] "Sustentacular cells_ACE2_and_TMPRSS2+ genes = 27 and Sustentacular cells_ACE2_and_TMPRSS2- genes = 2"
## Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-
## zero p-value...

## Finding Stouffer score after log transfer and zscore
### Loading seaurat object
sce_3_1_1_after_5000_23 <- readRDS("~/corona_project/sce_3_1_1_after_5000_23.rds")
mat<-sce_3_1_1_after_5000_23@assays$RNA@counts
## cell filtering
cells_sum <- Matrix::colSums(mat>=3)
mat<-mat[,intersect(which(cells_sum>=stats::quantile(cells_sum,probs = 0.001)), which(cells_sum<=stats::quantile(cells_sum,probs = 1)))]
## gene filtering
mat<-mat[which(Matrix::rowSums(mat > 2) > 3),]
## median normalization
cells_sum<-Matrix::rowSums(t(mat))
mat<-Matrix::t(t(mat)/(cells_sum/stats::median(cells_sum)))
### Loading media genes file and intersecting genes and filtering matrix common genes
media_genes <- read_excel("~/corona_project/media-6.xlsx")$...3[-1]
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
common_genes=intersect(rownames(mat),media_genes)
mat=mat[common_genes,]
### Further insuring have cells with atleast 10% expressed genes
mat<-mat[,Matrix::colSums(mat>0)>(dim(mat)[1]/10)]
print(dim(mat))
## [1]  293 3493
seurat_RNA_mat<-mat
print(sum(apply(seurat_RNA_mat,1,function(x) sum(x)==0)))
## [1] 0
print(sum(apply(seurat_RNA_mat,2,function(x) sum(x)==0)))
## [1] 0
print(dim(seurat_RNA_mat))
## [1]  293 3493
### First adding 1 (adding 1 to only zero, not only for zeros) then log2 then zscore
seurat_RNA_mat[seurat_RNA_mat==0]=1
seurat_RNA_mat<-log2(seurat_RNA_mat)
seurat_RNA_mat<-t(apply(seurat_RNA_mat,1,function(x) x*mean(x)/sd(x)))
### Stouffer score and then boxplot for each cell type
stouffer_score<- apply(seurat_RNA_mat,2,function(x) sum(x)/sqrt(length(x)))
print(sum(is.na(stouffer_score)))
## [1] 0
cell_types<-Idents(sce_3_1_1_after_5000_23)[names(stouffer_score)]
unique_cell_types=unique(cell_types)
Stouffer_score_df<-data.frame("Stouffer_score"=stouffer_score, "cell_types"=cell_types)
ggplot(Stouffer_score_df, aes(x=cell_types, y=Stouffer_score, fill=cell_types)) +
  geom_boxplot(position=position_dodge(.2)) +
  geom_jitter(shape=16, position=position_jitter(.1)) +
  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### One sided wilcoxon test
p_val_stouffer_score=matrix(0,nrow=length(unique_cell_types),ncol=length(unique_cell_types))
colnames(p_val_stouffer_score)=unique_cell_types
rownames(p_val_stouffer_score)=unique_cell_types
for (i in 1:dim(p_val_stouffer_score)[1])
{
  for (j in 1:dim(p_val_stouffer_score)[2])
  {
    print(paste(i,"_",j,sep=""))
    a=stouffer_score[names(cell_types)[cell_types==unique_cell_types[i]]]
    b=stouffer_score[names(cell_types)[cell_types==unique_cell_types[j]]]
    p_val_stouffer_score[i,j]=wilcox.test(a,b,alternative = "greater")$p.value
  }
}
## [1] "1_1"
## [1] "1_2"
## [1] "1_3"
## [1] "1_4"
## [1] "1_5"
## [1] "1_6"
## [1] "1_7"
## [1] "1_8"
## [1] "2_1"
## [1] "2_2"
## [1] "2_3"
## [1] "2_4"
## [1] "2_5"
## [1] "2_6"
## [1] "2_7"
## [1] "2_8"
## [1] "3_1"
## [1] "3_2"
## [1] "3_3"
## [1] "3_4"
## [1] "3_5"
## [1] "3_6"
## [1] "3_7"
## [1] "3_8"
## [1] "4_1"
## [1] "4_2"
## [1] "4_3"
## [1] "4_4"
## [1] "4_5"
## [1] "4_6"
## [1] "4_7"
## [1] "4_8"
## [1] "5_1"
## [1] "5_2"
## [1] "5_3"
## [1] "5_4"
## [1] "5_5"
## [1] "5_6"
## [1] "5_7"
## [1] "5_8"
## [1] "6_1"
## [1] "6_2"
## [1] "6_3"
## [1] "6_4"
## [1] "6_5"
## [1] "6_6"
## [1] "6_7"
## [1] "6_8"
## [1] "7_1"
## [1] "7_2"
## [1] "7_3"
## [1] "7_4"
## [1] "7_5"
## [1] "7_6"
## [1] "7_7"
## [1] "7_8"
## [1] "8_1"
## [1] "8_2"
## [1] "8_3"
## [1] "8_4"
## [1] "8_5"
## [1] "8_6"
## [1] "8_7"
## [1] "8_8"
p_val_stouffer_score
##                             Immature neurons Sustentacular cells Olfactory HBCs
## Immature neurons                5.000590e-01           1.0000000   6.811125e-01
## Sustentacular cells             9.219790e-93           0.5000307  1.985234e-158
## Olfactory HBCs                  3.189371e-01           1.0000000   5.000145e-01
## Mature neurons                  2.301533e-22           1.0000000   3.187886e-33
## Bowman's gland                  2.862754e-67           1.0000000  8.328144e-124
## Olfactory ensheathing glia      1.000000e+00           1.0000000   1.000000e+00
## Olfactory microvillar cells     7.753016e-16           1.0000000   4.410108e-20
## GBCs                            9.681461e-22           0.5528384   2.283045e-25
##                             Mature neurons Bowman's gland
## Immature neurons              1.000000e+00   1.000000e+00
## Sustentacular cells           1.607802e-29   7.369880e-16
## Olfactory HBCs                1.000000e+00   1.000000e+00
## Mature neurons                5.001178e-01   1.000000e+00
## Bowman's gland                2.329901e-10   5.000230e-01
## Olfactory ensheathing glia    1.000000e+00   1.000000e+00
## Olfactory microvillar cells   1.310813e-01   9.989642e-01
## GBCs                          1.740285e-08   1.047303e-03
##                             Olfactory ensheathing glia
## Immature neurons                          3.942532e-13
## Sustentacular cells                       4.852690e-81
## Olfactory HBCs                            2.410429e-22
## Mature neurons                            8.185962e-41
## Bowman's gland                            7.606873e-71
## Olfactory ensheathing glia                5.001834e-01
## Olfactory microvillar cells               2.212322e-29
## GBCs                                      1.775410e-28
##                             Olfactory microvillar cells      GBCs
## Immature neurons                           1.000000e+00 1.0000000
## Sustentacular cells                        3.844449e-12 0.4473999
## Olfactory HBCs                             1.000000e+00 1.0000000
## Mature neurons                             8.691373e-01 1.0000000
## Bowman's gland                             1.037161e-03 0.9989545
## Olfactory ensheathing glia                 1.000000e+00 1.0000000
## Olfactory microvillar cells                5.003853e-01 0.9999836
## GBCs                                       1.664388e-05 0.5007650
### Scatter plot and Pearson correlation with the average vectors from patient 2 and patient 3 
#### 5 average vector for patient 2 and 5 from patient 3 as "ACE2+","TMPRSS2+","BSG+","CTSL+","All_cells"
sce_3_1_1_after_5000_23 <- readRDS("~/corona_project/sce_3_1_1_after_5000_23.rds")
run=sce_3_1_1_after_5000_23@meta.data$run
for(i in c("ACE2","TMPRSS2","BSG","CTSL","All"))
{
  print(i)
  P2=sce_3_1_1_after_5000_23@assays$RNA@counts[,run=="P2"]
  dim(P2)
  if (i!="All")
    P2=P2[,P2[i,]>0]
  P2[P2==0]<-1
  P2=t(apply(P2,1,function(x) log2(x)))
  dim(P2)
  P2=rowMeans(as.matrix(P2))
  length(P2)
  P3=sce_3_1_1_after_5000_23@assays$RNA@counts[,run=="P3"]
  dim(P3)
  if (i!="All")
    P3=P3[,P3[i,]>0]
  P3[P3==0]<-1
  P3=t(apply(P3,1,function(x) log2(x)))
  dim(P3)
  P3=rowMeans(as.matrix(P3))
  length(P3)
  corr=cor(P2,P3,method="pearson")
  df=data.frame("P2"=P2,"P3"=P3)
  print(ggplot(df, aes(x=P2, y=P3)) + 
    geom_point(shape=18, color="blue")+
    geom_smooth(method=lm,  linetype="dashed",
                color="darkred", fill="blue")+
    ggtitle(paste("pearson correlation: ",corr,sep=""))+
    xlab("Patient 2")+ylab("Patient 3")+theme_classic())
}
## [1] "ACE2"
## `geom_smooth()` using formula 'y ~ x'

## [1] "TMPRSS2"
## `geom_smooth()` using formula 'y ~ x'

## [1] "BSG"
## `geom_smooth()` using formula 'y ~ x'

## [1] "CTSL"
## `geom_smooth()` using formula 'y ~ x'

## [1] "All"
## `geom_smooth()` using formula 'y ~ x'